{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "81a6f12e",
   "metadata": {},
   "source": [
    "下面几段代码展示朴素贝叶斯模型的训练和预测。这里使用的数据集为本书自制的Books数据集，包含约1万本图书的标题，分为3种主题。首先是预处理，针对文本分类的预处理主要包含以下步骤：\n",
    "\n",
    "- 通常可以将英文文本全部转换为小写，或者将中文内容全部转换为简体，等等，这一般不会改变文本内容。\n",
    "- 去除标点。英文中的标点符号和单词之间没有空格（如——“Hi, there!”），如果不去除标点，“Hi,”和“there!”会被识别为不同于“Hi”和“there”的两个词，这显然是不合理的。对于中文，移除标点一般也不会影响文本的内容。\n",
    "- 分词。中文汉字之间没有空格分隔，中文分词有时比英文分词更加困难，此处不再赘述。\n",
    "- 去除停用词（如“I”、“is”、“的”等）。这些词往往大量出现但没有具体含义。\n",
    "- 建立词表。通常会忽略语料库中频率非常低的词。\n",
    "- 将词转换为词表索引（ID），便于机器学习模型使用。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "e290d833",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['这是', '一个', '测试', '句子', '。']\n"
     ]
    }
   ],
   "source": [
    "import spacy\n",
    "nlp = spacy.load('zh_core_web_sm')\n",
    "doc = nlp(\"这是一个测试句子。\")\n",
    "print([token.text for token in doc])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "61929831",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting zh-core-web-sm==3.8.0\n",
      "  Downloading https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl (48.5 MB)\n",
      "     ---------------------------------------- 0.0/48.5 MB ? eta -:--:--\n",
      "     ---------------------------------------- 0.4/48.5 MB 13.5 MB/s eta 0:00:04\n",
      "      --------------------------------------- 1.0/48.5 MB 12.2 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.5/48.5 MB 11.9 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.7/48.5 MB 12.0 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.7/48.5 MB 12.0 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.7/48.5 MB 12.0 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.7/48.5 MB 12.0 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.7/48.5 MB 12.0 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.7/48.5 MB 12.0 MB/s eta 0:00:04\n",
      "     - -------------------------------------- 1.7/48.5 MB 4.3 MB/s eta 0:00:11\n",
      "     - -------------------------------------- 1.7/48.5 MB 4.3 MB/s eta 0:00:11\n",
      "     - -------------------------------------- 1.7/48.5 MB 4.3 MB/s eta 0:00:11\n",
      "     - -------------------------------------- 1.7/48.5 MB 4.3 MB/s eta 0:00:11\n",
      "     - -------------------------------------- 1.7/48.5 MB 4.3 MB/s eta 0:00:11\n",
      "     -- ------------------------------------- 3.5/48.5 MB 5.2 MB/s eta 0:00:09\n",
      "     ---- ----------------------------------- 5.0/48.5 MB 6.9 MB/s eta 0:00:07\n",
      "     ----- ---------------------------------- 7.0/48.5 MB 9.1 MB/s eta 0:00:05\n",
      "     ------ --------------------------------- 8.0/48.5 MB 10.1 MB/s eta 0:00:05\n",
      "     ------- -------------------------------- 8.6/48.5 MB 10.1 MB/s eta 0:00:04\n",
      "     ------- -------------------------------- 9.1/48.5 MB 10.2 MB/s eta 0:00:04\n",
      "     ------- -------------------------------- 9.6/48.5 MB 10.3 MB/s eta 0:00:04\n",
      "     -------- ------------------------------ 10.1/48.5 MB 10.2 MB/s eta 0:00:04\n",
      "     -------- ------------------------------ 10.1/48.5 MB 10.2 MB/s eta 0:00:04\n",
      "     -------- ------------------------------ 10.1/48.5 MB 10.2 MB/s eta 0:00:04\n",
      "     -------- ------------------------------ 10.1/48.5 MB 10.2 MB/s eta 0:00:04\n",
      "     -------- ------------------------------ 10.1/48.5 MB 10.2 MB/s eta 0:00:04\n",
      "     -------- ------------------------------ 10.1/48.5 MB 10.2 MB/s eta 0:00:04\n",
      "     -------- ------------------------------- 10.2/48.5 MB 8.1 MB/s eta 0:00:05\n",
      "     -------- ------------------------------- 10.2/48.5 MB 8.0 MB/s eta 0:00:05\n",
      "     -------- ------------------------------- 10.2/48.5 MB 8.0 MB/s eta 0:00:05\n",
      "     -------- ------------------------------- 10.2/48.5 MB 8.0 MB/s eta 0:00:05\n",
      "     -------- ------------------------------- 10.2/48.5 MB 8.0 MB/s eta 0:00:05\n",
      "     -------- ------------------------------- 10.7/48.5 MB 7.1 MB/s eta 0:00:06\n",
      "     --------- ------------------------------ 11.3/48.5 MB 7.1 MB/s eta 0:00:06\n",
      "     --------- ------------------------------ 11.3/48.5 MB 7.1 MB/s eta 0:00:06\n",
      "     --------- ------------------------------ 11.3/48.5 MB 7.1 MB/s eta 0:00:06\n",
      "     --------- ------------------------------ 11.3/48.5 MB 7.1 MB/s eta 0:00:06\n",
      "     --------- ------------------------------ 11.3/48.5 MB 7.1 MB/s eta 0:00:06\n",
      "     --------- ------------------------------ 11.3/48.5 MB 7.1 MB/s eta 0:00:06\n",
      "     ---------- ----------------------------- 13.1/48.5 MB 9.0 MB/s eta 0:00:04\n",
      "     -------------- ------------------------- 17.4/48.5 MB 9.4 MB/s eta 0:00:04\n",
      "     --------------- ------------------------ 18.4/48.5 MB 9.9 MB/s eta 0:00:04\n",
      "     --------------- ------------------------ 18.4/48.5 MB 9.9 MB/s eta 0:00:04\n",
      "     --------------- ------------------------ 18.4/48.5 MB 9.9 MB/s eta 0:00:04\n",
      "     --------------- ------------------------ 18.9/48.5 MB 8.7 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.6/48.5 MB 9.0 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.8/48.5 MB 8.8 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.8/48.5 MB 8.8 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.8/48.5 MB 8.8 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.8/48.5 MB 8.8 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.8/48.5 MB 8.8 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.8/48.5 MB 8.8 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.9/48.5 MB 7.2 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.9/48.5 MB 7.2 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.9/48.5 MB 7.2 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.9/48.5 MB 7.2 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 19.9/48.5 MB 7.2 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 20.3/48.5 MB 7.4 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 20.3/48.5 MB 7.4 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 20.3/48.5 MB 7.4 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 20.3/48.5 MB 7.4 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 20.3/48.5 MB 7.4 MB/s eta 0:00:04\n",
      "     ---------------- ----------------------- 20.5/48.5 MB 7.4 MB/s eta 0:00:04\n",
      "     ------------------ --------------------- 22.9/48.5 MB 9.4 MB/s eta 0:00:03\n",
      "     ---------------------- ----------------- 27.3/48.5 MB 9.4 MB/s eta 0:00:03\n",
      "     ---------------------- ----------------- 27.3/48.5 MB 9.4 MB/s eta 0:00:03\n",
      "     ---------------------- ----------------- 27.3/48.5 MB 9.4 MB/s eta 0:00:03\n",
      "     ---------------------- ----------------- 27.7/48.5 MB 8.4 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 28.2/48.5 MB 8.1 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 28.7/48.5 MB 8.8 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 29.0/48.5 MB 8.7 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 29.0/48.5 MB 8.7 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 29.0/48.5 MB 8.7 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 29.0/48.5 MB 8.7 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 29.0/48.5 MB 8.7 MB/s eta 0:00:03\n",
      "     ----------------------- ---------------- 29.0/48.5 MB 8.7 MB/s eta 0:00:03\n",
      "     ------------------------ --------------- 29.9/48.5 MB 7.3 MB/s eta 0:00:03\n",
      "     ------------------------ -------------- 30.9/48.5 MB 15.2 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     ------------------------- ------------- 31.8/48.5 MB 14.9 MB/s eta 0:00:02\n",
      "     --------------------------- ------------ 33.6/48.5 MB 9.0 MB/s eta 0:00:02\n",
      "     ---------------------------- ----------- 34.7/48.5 MB 8.6 MB/s eta 0:00:02\n",
      "     ----------------------------- ---------- 35.9/48.5 MB 8.5 MB/s eta 0:00:02\n",
      "     -------------------------------- ------ 39.9/48.5 MB 14.9 MB/s eta 0:00:01\n",
      "     -------------------------------- ------ 39.9/48.5 MB 14.9 MB/s eta 0:00:01\n",
      "     -------------------------------- ------ 40.4/48.5 MB 13.4 MB/s eta 0:00:01\n",
      "     -------------------------------- ------ 40.9/48.5 MB 13.1 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 41.5/48.5 MB 12.8 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 41.7/48.5 MB 12.6 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 41.7/48.5 MB 12.6 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 41.7/48.5 MB 12.6 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 41.7/48.5 MB 12.6 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 41.7/48.5 MB 12.6 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 41.7/48.5 MB 12.6 MB/s eta 0:00:01\n",
      "     ---------------------------------- ----- 41.7/48.5 MB 9.5 MB/s eta 0:00:01\n",
      "     ---------------------------------- ----- 41.7/48.5 MB 9.5 MB/s eta 0:00:01\n",
      "     ---------------------------------- ----- 41.7/48.5 MB 9.5 MB/s eta 0:00:01\n",
      "     ---------------------------------- ----- 41.7/48.5 MB 9.5 MB/s eta 0:00:01\n",
      "     ---------------------------------- ----- 41.7/48.5 MB 9.5 MB/s eta 0:00:01\n",
      "     --------------------------------- ----- 42.1/48.5 MB 11.7 MB/s eta 0:00:01\n",
      "     ---------------------------------- ---- 43.2/48.5 MB 11.1 MB/s eta 0:00:01\n",
      "     ------------------------------------ -- 45.9/48.5 MB 11.7 MB/s eta 0:00:01\n",
      "     ------------------------------------- - 46.9/48.5 MB 11.5 MB/s eta 0:00:01\n",
      "     --------------------------------------  48.4/48.5 MB 10.9 MB/s eta 0:00:01\n",
      "     --------------------------------------  48.5/48.5 MB 10.7 MB/s eta 0:00:01\n",
      "     ---------------------------------------- 48.5/48.5 MB 9.9 MB/s eta 0:00:00\n",
      "Requirement already satisfied: spacy-pkuseg<2.0.0,>=1.0.0 in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from zh-core-web-sm==3.8.0) (1.0.0)\n",
      "Requirement already satisfied: srsly<3.0.0,>=2.3.0 in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from spacy-pkuseg<2.0.0,>=1.0.0->zh-core-web-sm==3.8.0) (2.5.1)\n",
      "Requirement already satisfied: numpy<3.0.0,>=2.0.0 in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from spacy-pkuseg<2.0.0,>=1.0.0->zh-core-web-sm==3.8.0) (2.2.5)\n",
      "Requirement already satisfied: catalogue<2.1.0,>=2.0.3 in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from srsly<3.0.0,>=2.3.0->spacy-pkuseg<2.0.0,>=1.0.0->zh-core-web-sm==3.8.0) (2.0.10)\n",
      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
      "You can now load the package via spacy.load('zh_core_web_sm')\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip is available: 24.0 -> 25.1.1\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    }
   ],
   "source": [
    "!python -m spacy download zh_core_web_sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "5936ceb0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train size = 8627 , test size = 2157\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 8627/8627 [00:34<00:00, 249.55it/s]\n",
      "100%|██████████| 2157/2157 [00:08<00:00, 264.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['python', '编程', '入门', '教程']\n",
      "{'计算机类': 0, '艺术传媒类': 1, '经管类': 2}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "from tqdm import tqdm\n",
    "from collections import defaultdict\n",
    "from spacy.lang.zh.stop_words import STOP_WORDS\n",
    "import spacy\n",
    "\n",
    "# 加载中文分词模型\n",
    "nlp = spacy.load('zh_core_web_sm')\n",
    "\n",
    "class BooksDataset:\n",
    "    def __init__(self):\n",
    "        train_file = r'F:\\vscode\\社会舆情\\@Hands-on-NLP-main\\train.jsonl'\n",
    "        test_file = r'F:\\vscode\\社会舆情\\@Hands-on-NLP-main\\test.jsonl'\n",
    "\n",
    "        # 下载数据为JSON格式，转化为Python对象\n",
    "        def read_file(file_name):\n",
    "            with open(file_name, 'r', encoding='utf-8') as fin:\n",
    "                json_list = list(fin)\n",
    "            data_split = []\n",
    "            for json_str in json_list:\n",
    "                data_split.append(json.loads(json_str))\n",
    "            return data_split\n",
    "\n",
    "        self.train_data, self.test_data = read_file(train_file), read_file(test_file)\n",
    "        print('train size =', len(self.train_data), ', test size =', len(self.test_data))\n",
    "        \n",
    "        # 建立文本标签和数字标签的映射\n",
    "        self.label2id, self.id2label = {}, {}\n",
    "        for data_split in [self.train_data, self.test_data]:\n",
    "            for data in data_split:\n",
    "                txt = data['class']\n",
    "                if txt not in self.label2id:\n",
    "                    idx = len(self.label2id)\n",
    "                    self.label2id[txt] = idx\n",
    "                    self.id2label[idx] = txt\n",
    "                label_id = self.label2id[txt]\n",
    "                data['label'] = label_id\n",
    "\n",
    "    def tokenize(self, attr='book'):\n",
    "        # 去除文本中的符号和停用词\n",
    "        for data_split in [self.train_data, self.test_data]:\n",
    "            for data in tqdm(data_split):\n",
    "                # 转为小写\n",
    "                text = data[attr].lower()\n",
    "                # 符号替换为空\n",
    "                tokens = [t.text for t in nlp(text) if t.text not in STOP_WORDS]\n",
    "                # 这一步比较耗时，因此把tokenize的结果储存起来\n",
    "                data['tokens'] = tokens\n",
    "\n",
    "    # 根据分词结果建立词表，忽略部分低频词，\n",
    "    # 可以设置词最短长度和词表最大大小\n",
    "    def build_vocab(self, min_freq=3, min_len=2, max_size=None):\n",
    "        frequency = defaultdict(int)\n",
    "        for data in self.train_data:\n",
    "            tokens = data['tokens']\n",
    "            for token in tokens:\n",
    "                frequency[token] += 1 \n",
    "\n",
    "        print(f'unique tokens = {len(frequency)}, '+\\\n",
    "              f'total counts = {sum(frequency.values())}, '+\\\n",
    "              f'max freq = {max(frequency.values())}, '+\\\n",
    "              f'min freq = {min(frequency.values())}')    \n",
    "\n",
    "        self.token2id = {}\n",
    "        self.id2token = {}\n",
    "        total_count = 0\n",
    "        for token, freq in sorted(frequency.items(), key=lambda x: -x[1]):\n",
    "            if max_size and len(self.token2id) >= max_size:\n",
    "                break\n",
    "            if freq > min_freq:\n",
    "                if (min_len is None) or (min_len and len(token) >= min_len):\n",
    "                    self.token2id[token] = len(self.token2id)\n",
    "                    self.id2token[len(self.id2token)] = token\n",
    "                    total_count += freq\n",
    "            else:\n",
    "                break\n",
    "        print(f'min_freq = {min_freq}, min_len = {min_len}, '+\\\n",
    "              f'max_size = {max_size}, '\n",
    "              f'remaining tokens = {len(self.token2id)}, '\n",
    "              f'in-vocab rate = {total_count / sum(frequency.values())}')\n",
    "\n",
    "    # 将分词后的结果转化为数字索引\n",
    "    def convert_tokens_to_ids(self):\n",
    "        for data_split in [self.train_data, self.test_data]:\n",
    "            for data in data_split:\n",
    "                data['token_ids'] = []\n",
    "                for token in data['tokens']:\n",
    "                    if token in self.token2id:\n",
    "                        data['token_ids'].append(self.token2id[token])\n",
    "\n",
    "# 测试代码\n",
    "dataset = BooksDataset()\n",
    "dataset.tokenize()\n",
    "print(dataset.train_data[0]['tokens'])\n",
    "print(dataset.label2id)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26d79e05",
   "metadata": {},
   "source": [
    "完成分词后，对出现次数超过3次的词元建立词表，并将分词后的文档转化为词元id的序列。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "b3b4c04f-99e6-4b04-91c9-7603ea5f7100",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['c:\\\\Users\\\\Mr Jige\\\\AppData\\\\Local\\\\Programs\\\\Python\\\\Python312', 'c:\\\\Users\\\\Mr Jige\\\\AppData\\\\Local\\\\Programs\\\\Python\\\\Python312\\\\Lib\\\\site-packages']\n"
     ]
    }
   ],
   "source": [
    "import site\n",
    "print(site.getsitepackages())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "0d6b1918",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "unique tokens = 6956, total counts = 54884, max freq = 1635, min freq = 1\n",
      "min_freq = 3, min_len = 2, max_size = None, remaining tokens = 1650, in-vocab rate = 0.7944209605713869\n",
      "[18, 26, 5, 0]\n"
     ]
    }
   ],
   "source": [
    "dataset.build_vocab(min_freq=3)\n",
    "dataset.convert_tokens_to_ids()\n",
    "print(dataset.train_data[0]['token_ids'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d096d95f",
   "metadata": {},
   "source": [
    "接下来将数据和标签准备成便于训练的矩阵格式。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ba632265",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "train_X, train_Y = [], []\n",
    "test_X, test_Y = [], []\n",
    "\n",
    "for data in dataset.train_data:\n",
    "    x = np.zeros(len(dataset.token2id), dtype=np.int32)\n",
    "    for token_id in data['token_ids']:\n",
    "        x[token_id] += 1\n",
    "    train_X.append(x)\n",
    "    train_Y.append(data['label'])\n",
    "for data in dataset.test_data:\n",
    "    x = np.zeros(len(dataset.token2id), dtype=np.int32)\n",
    "    for token_id in data['token_ids']:\n",
    "        x[token_id] += 1\n",
    "    test_X.append(x)\n",
    "    test_Y.append(data['label'])\n",
    "train_X, train_Y = np.array(train_X), np.array(train_Y)\n",
    "test_X, test_Y = np.array(test_X), np.array(test_Y)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3938acdb",
   "metadata": {},
   "source": [
    "下面代码展示朴素贝叶斯的训练和预测。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "f13251b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "P(计算机类) = 0.4453460067230787\n",
      "P(艺术传媒类) = 0.26660484525327466\n",
      "P(经管类) = 0.2880491480236467\n",
      "P(教程|计算机类) = 0.5726495726495726\n",
      "P(基础|计算机类) = 0.6503006012024048\n",
      "P(设计|计算机类) = 0.606694560669456\n",
      "test example-0, prediction = 0, label = 0\n",
      "test example-1, prediction = 0, label = 0\n",
      "test example-2, prediction = 1, label = 1\n",
      "test example-3, prediction = 1, label = 1\n",
      "test example-4, prediction = 1, label = 1\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "class NaiveBayes:\n",
    "    def __init__(self, num_classes, vocab_size):\n",
    "        self.num_classes = num_classes\n",
    "        self.vocab_size = vocab_size\n",
    "        self.prior = np.zeros(num_classes, dtype=np.float64)\n",
    "        self.likelihood = np.zeros((num_classes, vocab_size),\\\n",
    "            dtype=np.float64)\n",
    "        \n",
    "    def fit(self, X, Y):\n",
    "        # NaiveBayes的训练主要涉及先验概率和似然的估计，\n",
    "        # 这两者都可以通过计数简单获得\n",
    "        for x, y in zip(X, Y):\n",
    "            self.prior[y] += 1\n",
    "            for token_id in x:\n",
    "                self.likelihood[y, token_id] += 1\n",
    "                \n",
    "        self.prior /= self.prior.sum()\n",
    "        # laplace平滑\n",
    "        self.likelihood += 1\n",
    "        self.likelihood /= self.likelihood.sum(axis=0)\n",
    "        # 为了避免精度溢出，使用对数概率\n",
    "        self.prior = np.log(self.prior)\n",
    "        self.likelihood = np.log(self.likelihood)\n",
    "    \n",
    "    def predict(self, X):\n",
    "        # 算出各个类别的先验概率与似然的乘积，找出最大的作为分类结果\n",
    "        preds = []\n",
    "        for x in X:\n",
    "            p = np.zeros(self.num_classes, dtype=np.float64)\n",
    "            for i in range(self.num_classes):\n",
    "                p[i] += self.prior[i]\n",
    "                for token in x:\n",
    "                    p[i] += self.likelihood[i, token]\n",
    "            preds.append(np.argmax(p))\n",
    "        return preds\n",
    "\n",
    "nb = NaiveBayes(len(dataset.label2id), len(dataset.token2id))\n",
    "train_X, train_Y = [], []\n",
    "for data in dataset.train_data:\n",
    "    train_X.append(data['token_ids'])\n",
    "    train_Y.append(data['label'])\n",
    "nb.fit(train_X, train_Y)\n",
    "\n",
    "for i in range(3):\n",
    "    print(f'P({dataset.id2label[i]}) = {np.exp(nb.prior[i])}')\n",
    "for i in range(3):\n",
    "    print(f'P({dataset.id2token[i]}|{dataset.id2label[0]}) = '+\\\n",
    "          f'{np.exp(nb.likelihood[0, i])}')\n",
    "\n",
    "test_X, test_Y = [], []\n",
    "for data in dataset.test_data:\n",
    "    test_X.append(data['token_ids'])\n",
    "    test_Y.append(data['label'])\n",
    "    \n",
    "NB_preds = nb.predict(test_X)\n",
    "    \n",
    "for i, (p, y) in enumerate(zip(NB_preds, test_Y)):\n",
    "    if i >= 5:\n",
    "        break\n",
    "    print(f'test example-{i}, prediction = {p}, label = {y}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a1cf6399",
   "metadata": {},
   "source": [
    "下面使用第3章介绍的TF-IDF方法得到文档的特征向量，并使用PyTorch实现逻辑斯谛回归模型的训练和预测。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "8679bd59-83bf-4281-8847-9fa3335eb4d8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c:\\Users\\Mr Jige\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\utils\\__init__.py\n"
     ]
    }
   ],
   "source": [
    "import utils\n",
    "print(utils.__file__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "1bc70877",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: nltk in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (3.9.1)\n",
      "Requirement already satisfied: click in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from nltk) (8.1.8)\n",
      "Requirement already satisfied: joblib in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from nltk) (1.4.2)\n",
      "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from nltk) (2024.11.6)\n",
      "Requirement already satisfied: tqdm in c:\\users\\mr jige\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from nltk) (4.67.1)\n",
      "Requirement already satisfied: colorama in c:\\users\\mr jige\\appdata\\roaming\\python\\python312\\site-packages (from click->nltk) (0.4.6)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip is available: 24.0 -> 25.1.1\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    }
   ],
   "source": [
    "pip install nltk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "21a3bc79",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "sys.path.append('../code')\n",
    "from my_utils import TFIDF  # utils 和系统中的 utils 库重名了，如上代码显示，因此可以将自己编写的 utils 重命名为 my_utils\n",
    "        \n",
    "tfidf = TFIDF(len(dataset.token2id))\n",
    "tfidf.fit(train_X)\n",
    "train_F = tfidf.transform(train_X)\n",
    "test_F = tfidf.transform(test_X)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc8af30b",
   "metadata": {},
   "source": [
    "逻辑斯谛回归可以看作一个一层的神经网络模型，使用PyTorch实现可以方便地利用自动求导功能。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "1ddebf0c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "epoch-49, loss=0.2611: 100%|█| 50/50 [00:07<00:00,  6.26it/s\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAGwCAYAAABGogSnAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAViFJREFUeJzt3Qd4U9X7B/C3u5QOCqUto6VsKLuUQtlIoQwBFRQBWTIEREFABdmggiBDEUFFwL8/FByIg02lLAtlFcpehZbRMgrdu/k/55SkSXOTZq/7/TxPSHJzc3NySZu357zvOXYSiURCAAAAACJhb+4GAAAAAJgSgh8AAAAQFQQ/AAAAICoIfgAAAEBUEPwAAACAqCD4AQAAAFFB8AMAAACi4kgiU1xcTPfv3ycPDw+ys7Mzd3MAAABAA2xawoyMDKpevTrZ2+vXdyO64IcFPgEBAeZuBgAAAOggKSmJatasSfoQXfDDenykJ8/T09PczQEAAAANpKen884L6fe4PkQX/EiHuljgg+AHAADAuhgiZQUJzwAAACAqCH4AAABAVBD8AAAAgKgg+AEAAABRQfADAAAAooLgBwAAAEQFwQ8AAACICoIfAAAAEBUEPwAAACAqCH4AAABAVBD8AAAAgKgg+AEAAABRQfBjIEXFEnqYnku3H2eZuykAAACgBoIfA4m5+YTCPo2iMT+cNHdTAAAAQA0EPwZS1cOFX998lEUPM3LN3RwAAABQAcGPgdT0riC7/eupu2ZtCwAAAKiG4MdAKro4Uvu6Vfjts4nPzN0cAAAAUAHBjwFN79mAXx+4nELFxRJzNwcAAAAEIPgxoCbVvWS3919OMWtbAAAAQBiCHwNydXKQ3X7rx9O8/B0AAAAsC4IfA+sR7Ce7fflBulnbAgAAAMoQ/BjY4NAA2e3UrHyztgUAAACUIfgxsO6NfWW3bz3KNGtbAAAAQBmCHwOzs7Oj6T1Kqr4W/H2JJBLk/QAAAFgSBD9GUKequ+z2n3H3zdoWAAAAUITgxwi6NKwqu71871WztgUAAAAUIfgxAncXR/J9vtZXXmGxuZsDAAAAchD8GMmqwS359ePMPErLLjB3cwAAAOA5BD9G0q5OyTpfzMhNsWZtCwAAAJRC8GMkDvZ2sttxSVjoFAAAwFIg+DGisKDKsttPMeEhAACARUDwY0T/NyZMdrvV4v1Y6wsAAMACIPgx0UKnzJVkrPUFAABgbgh+TOgvTHgIAABgdgh+TOibw7foQVqOuZsBAAAgagh+TOyrf2+YuwkAAACihuDHyAIruync33Ii0WxtAQAAAAQ/Rvf9yFClbRj6AgAAMB8EP0ZW38+Dtk9qr7Dt0NVHZmsPAACA2CH4MYGQQG+F+zO3x5utLQAAAGKH4MdEBrWuae4mAAAAAIIf03kVwQ8AAIBFQPBjIm3lVnln+q05Shm5BWZrDwAAgFgh+DGT+Htp9NVBzPkDAABgagh+zOjuU5S8AwAAmBqCHzPaef4BXbqPxU4BAABEE/wcPnyY+vXrR9WrVyc7OzvasWNHuc+Jjo6mkJAQcnFxoXr16tHmzZvJWrwX0UBp28B1/5mlLQAAAGJl1uAnKyuLWrRoQWvXrtVo/4SEBOrbty9169aN4uLiaOrUqTR27Fjau3cvWYMhYQFK23IKiszSFgAAALFyNOeL9+7dm180tX79eqpduzatWLGC32/cuDEdPXqUVq1aRZGRkWTpfD1dzd0EAAAA0bOqnJ+YmBiKiIhQ2MaCHrZdlby8PEpPT1e4WJonmXnmbgIAAIBoWFXwk5ycTH5+fgrb2H0W0OTkCFdOLVmyhLy8vGSXgADloSdT+nVCuNK25XuvmqUtAAAAYmRVwY8uZs2aRWlpabJLUlKSWdvTJqgyLezfRGHb1pNJdPdpttnaBAAAICZWFfz4+/tTSkqKwjZ239PTkypUqCD4HFYVxh6Xv5ibnZ3yto6fHaSfTiSaozkAAACiYlXBT3h4OEVFRSls279/P99uTSQS4e0f/YHV3gEAAGw6+MnMzOQl6+wiLWVntxMTE2VDViNGjJDtP2HCBLp16xZ98MEHdOXKFfr666/pl19+offee4+syQuNfFU+9mXUdZO2BQAAQGzMGvycOnWKWrVqxS/MtGnT+O158+bx+w8ePJAFQgwrc9+5cyfv7WHzA7GS9w0bNlhFmbu8gMpu9OOYMMHHVu6/ZvL2AAAAiImdRKJqEMY2scowVvXFkp/Nnf/DAh2hnp7bS/uapT0AAABi+P62qpwfWzOth/JyF0x2fqHJ2wIAACAWCH7MbGpEfaVtmXkIfgAAAIwFwY+ZRTRWnLSREddAJAAAgGkh+DEzRwflSX/aL/3XLG0BAAAQAwQ/ZuZorxz8FBVLSGR56AAAACaD4MfMvN2cBbfXnrWLUrPyTd4eAAAAW4fgx8yquLuofGztwRsmbQsAAIAYIPixYLkFReZuAgAAgM1B8GMB3uxQW3D7lhOJNGs71vsCAAAwJAQ/FmBm70b0vzFtBR/7OTYRkx4CAAAYEIIfC+DsaE8d6/uQQOEXtwrrfQEAABgMgh8LEla7suD2744kUFJqtsnbAwAAYIsQ/FiJ7Wfu0Y6z9/gcQAAAAKA7Rz2eCya06kDJ0Nez7Hwa1aE2PUzPpcoVncnRAfErAACANvDNaWUOX39M8XfTKOzTKBq0PsbczQEAALA6CH4syJy+weXuU1gsoV9PJ/HbcUnPTNAqAAAA24Lgx4I0reFFFxZGqt2nqLiYHqTlmqxNAAAAtgbBj4Vxd3GkIx90U/n4sRtPaP+lFJO2CQAAwJYg+LFAAZXdaOVrLczdDAAAAJuE4MdCvRJS09xNAAAAsEkIfgAAAEBUEPwAAACAqCD4AQAAAFFB8GPlklH2DgAAoBUEP1Zu0Pr/zN0EAAAAq4Lgx8rdfZpj7iYAAABYFQQ/FmxU+yB+/XqbAHM3BQAAwGYg+LFg8/sF09m5PWjpwOZaPS8nv4gW/X2JYhNSjdY2AAAAa4Xgx4LZ2dmRd0VnrZ+3LvoGbTyWQK99g1XfAQAAykLwYyUGh6oe+jp45aHC/VuPs0zQIgAAAOuE4MdKfDaoOf00tq3gY6M3n+TXV5LTqfcXRyjqsmIwBAAAAKUc5W6DhWtfz0flYz/8d5vm/3XRpO0BAACwRuj5sREIfAAAADSD4MfKrBnSytxNAAAAsGoIfqxMfT93czcBAADAqiH4sTJ+Hq7mbgIAAIBVQ/BjZXSZ9wcAAABKIfixcUXFEnM3AQAAwKIg+LFxR288NncTAAAALAqCHxuXX1hs7iYAAABYFAQ/Vqh7I19zNwEAAMBqIfixQt+NCKXzC3pqtG9yWg49y843epsAAACsBYIfK2Rvb0eerk4a7Tv3z4vU9tMokkiQ+AwAAMAg+BGBvMJiOn83zdzNAAAAsAgIfkRi4Lr/zN0EAAAAi4DgRyQKMd8PAAAAh+BHRG48zKQ3N5+ks4lPzd0UAAAAs0HwIyJjfjhJ/155SC9/jSEwAAAQLwQ/InLnSbbsdm5BkVnbAgAAYC4IfmxAnaoVZbcX9AvW6DlPsjD3DwAAiBOCHxvQp2k1rVd9/+VkkhFbBAAAYLkQ/NgAO7vS2/byd9T4Iuo6peUUGK9RAAAAFgrBjw1g4c6QsABq6OdBPYL9qKKzg0bPy8orVNqGmaABAMDWOZq7AaA7Vyd7yi0opi4Nq1LrWpVl2x3sNev9KSoz98+V5HQa/n0sTY2oT8Pa1jJ4ewEAACwBen6sWMzM7vT35I4KgQ+zaEBTjZ6fkVso6+35M+4evbouhh5l5NHsPy4Ypb0AAACWAMGPFWPJzc1qeiltf6lVDY2e3+fLI3TzUSbtu5RCU7bGUYbAMBgAAICtwbCXyHVfcYjCaiv2HAEAANgy9PwAxSakmrsJAAAAJoPgx0b9OCbM3E0AAACwSAh+bFQDPw9zNwEAAMAimT34Wbt2LQUFBZGrqyu1bduWYmNj1e6/evVqatiwIVWoUIECAgLovffeo9zcXJO111r4ebrSsoHNaeOoUHqxeekM0AAAAGJn1oTnbdu20bRp02j9+vU88GGBTWRkJF29epV8fX2V9v/pp59o5syZtHHjRmrfvj1du3aNRo0aRXZ2drRy5UqzvAdL9lqbAH79QiM/+uf8TnM3BwAAwCKYteeHBSzjxo2j0aNHU3BwMA+C3NzceHAj5L///qMOHTrQ0KFDeW9Rz549aciQIWp7i/Ly8ig9PV3hIkY3Pumt1f6FRcVGawsAAIAog5/8/Hw6ffo0RURElDbG3p7fj4mJEXwO6+1hz5EGO7du3aJdu3ZRnz59VL7OkiVLyMvLS3ZhQ2Vi5Oig3X91vdm7KWjmTtp8LMFobQIAABBV8PP48WMqKioiPz8/he3sfnJysuBzWI/PokWLqGPHjuTk5ER169alrl270kcffaTydWbNmkVpaWmyS1ISVjPXxoK/L5m7CQAAALaV8KyN6Oho+vTTT+nrr7+mM2fO0Pbt22nnzp20ePFilc9xcXEhT09PhQsAAACIl9kSnn18fMjBwYFSUlIUtrP7/v7+gs+ZO3cuDR8+nMaOHcvvN2vWjLKysmj8+PE0e/ZsPmwGqjXwc6drKZlaP+96SgZdvJ9OA1pW58nlAAAA1sxs0YKzszO1bt2aoqKiZNuKi4v5/fDwcMHnZGdnKwU4LICSLs4J6v01uSO1q6P9UhY9Vh2mqdvi6MDlh0ZpFwAAgCmZtauElbl/99139MMPP9Dly5dp4sSJvCeHVX8xI0aM4Dk7Uv369aN169bR1q1bKSEhgfbv3897g9h2aRAEqrk6OdDW8eHUupa3Ts+/cC/N4G0CAAAQ1Tw/gwcPpkePHtG8efN4knPLli1pz549siToxMREhZ6eOXPm8GEXdn3v3j2qWrUqD3w++eQTM74L63P6zlNzNwEAAMBs7CQiGy9i8/ywkndW+SXW5GdWwq6LqRH1aWpEA4O3BwAAwJTf38gQBgAAAFFB8CNCuiQ9M3aESi8AALB+CH5E6MshrejtbnVpy9i2Wj2PVbnfeJhBn+25Qs+y843WPgAAAGNCzo+I3X6cRV0/j9bpuX2bV6O1Q0MM3iYAAAAhyPkBg8jMK9T5ubEJqQZtCwAAgKkg+BGxGpUq6Pzc/EKs+g4AANYJwY+IeVd0pqjpXXR6blpOAX2+9yqlZRcYvF0AAADGhOBH5OpWdac2QbrN+PzVwRvUYtE+OnkbQ2AAAGA9EPwAfT2stV7PH7Ux1mBtAQAAMDYEP0BVPVzI01X3lU6y8osor7DIoG0CAAAwFgQ/wLE10/Txf//dMVhbAAAAjAnBD3BODvoFP3efZhusLQAAAMaE4Ae4b4aH6vV8NlPm1eQMung/jfZceECn76TSsA3H6dL9dIO1EQAAwBAwwzMo9N58fzSB6vhUpLl/XjTIMb3dnOjsvJ4GORYAAIhXOmZ4BmOo6e1G8/s1obq+7gY75lPMAwQAABYGwQ8oaVe7irmbAAAAYDQIfkCJvb1+yc8AAACWDMEPAAAAiAqCHwAAABAVBD8AAAAgKgh+wOgeZuSauwkAAAAyCH7A6MI+iaKDVx+auxkAAAAcgh8QtH1Se+oZ7Ec/jWtrkOON3nTSIMcBAADQl+5LeYNNCwn0pm9HlCx58enLzcirghO9/dMZczcLAABAb+j5gXINbRtIfZtXo4jGfnodJ2jmTpr+yzkqKCo2WNsAAAC0heAHNLZqcAvycXfW6xi/n7lL9Wfvps/2XDFYuwAAALSB4Ac05uHqRAdndDXIsdZF3zTIcQAAALSF4Ae0DoCm9Whg7mYAAADoDMEPaO3d7vUNcpzs/EKDHAcAAEAbCH7AbFbuu6ZwnyVCIyACAABjQ6k7mM2GownUtIYXLf7nEoUGedP5u2n0IC2XLi6MpIou+GgCAIBxoOcHdFLBycEgx5m6LY6eZOXT3ospPPBhvoy6bpBjAwAACEHwAzpZ8kozox37m8O3KD23gPIKi2j1gWsUl/TMaK8FAADig+AHdOLn6WrU46fnFNCmY7dp9YHr9NLaY0Z9LQAAEBcEP2CRCoskdC05Q2Hbmqjr9MnOS2ZrEwAA2AYEP6ATCUlktyOb6LfshZCyS2AUF0toxf5r9N2RBEpKzTb46wEAgHgg+AGdFz6t6uFCbYK8af0brQ1+/Pwywc/GYwkqHwMAANAG6olBJ65ODhQz8wVysLcjOzs7gx8/+uojIrnDfrzzsuy2vRFeDwAAxAM9P6AzRwd7pcDnjXaBJN20bGBznY+9fO9V2n7mnuBjCH0AAEAf6PkBg+oR7E/TejSkx5l51MDPgz74/bzBXwM9PwAAoA/0/IDBVa7ozAMfY2GxT25BEW07mUjJzydGBAAA0BSCHzAoiaS0CowZEV7LKMHPyv3X6MPf42nA2qOy7XefZtPZxKcGfz0AALAtGPYCo1o0oCm5uzjS19E3DXbM3l8coYzckgVQU9LzZNs7fnaQXx+Y1oXq+bob7PUAAMC2oOcHjO6DXo0Mejxp4KPKhXtpBn09AACwLQh+wOAl8ObG1gQDAAAwaPDzww8/0M6dO2X3P/jgA6pUqRK1b9+e7ty5o8shwcq9H9mQXmlVg9rWrmzupvBcoEcZpcNhAAAAegc/n376KVWoUIHfjomJobVr19KyZcvIx8eH3nvvPV0OCVbu7W71aOXglionPPz0ZeOtAi/kl1NJJn09AACw8eAnKSmJ6tWrx2/v2LGDBg4cSOPHj6clS5bQkSNHDN1GsAEujsYbYd17MZmeZecrbPsi6rrRXg8AAKybTt9I7u7u9OTJE35737591KNHD37b1dWVcnJyDNtCsDkeLo708UtNDXa8t348TS0X7VfYll+I9b8AAMCApe4s2Bk7diy1atWKrl27Rn369OHbL168SEFBQbocEmyc/Ow/ozqY5jPC5v3x83QlJwfk9QMAQCmdvhVYjk94eDg9evSIfv/9d6pSpQrffvr0aRoyZIguhwQRmdClrkIwZCxs3p8h3x43wSsBAIDN9/ywyq6vvvpKafvChQsN0SawQe4upSXwFV0cydvNySSve+rOUyoqltDV5Axq6O/BV6EHAABx06nnZ8+ePXT06FGFnqCWLVvS0KFD6elTLC8AyiIa+1HfZtVoTt/G/H7vptXIx93FJK+9ZNdl6vPlEfp012WTvB4AANhg8PP+++9Teno6vx0fH0/Tp0/neT8JCQk0bdo0Q7cRbICjgz2tHRZCYzvV4fdZD8zJ2d1N8tobjibw6++fX0slpWbTwwwsjAoAIDY6DXuxICc4OJjfZjk/L774Ip/758yZM7LkZ4DyqJoTyJiy8wupWEJUVCShTstK1gK7vbSvydsBAABW1vPj7OxM2dnZ/PaBAweoZ8+e/HblypVlPUIAmpjfL5iCqrhRAz/TLEQaPG8vNZ2/l64/zDDJ6wEAgI0EPx07duTDW4sXL6bY2Fjq27fkL2dW9l6zZk1DtxFs2OgOtSn6/W5U38/DpK+L5S8AAMRLp+CHVXo5OjrSb7/9RuvWraMaNWrw7bt376ZevXoZuo0gAtJEaHOQSExReA8AAFad8xMYGEj//POP0vZVq1YZok0gQtW8StaKMxX5dCNWCu/ogBJ4AACx0Hnq26KiIp7s/PHHH/PLH3/8wbdpi5XJs1mh2dIYbdu25cNo6jx79ozefvttqlatGrm4uFCDBg1o165dur4NsEAtanqZ4FVKg50i9PwAAIiKTj0/N27c4FVd9+7do4YNG/JtbFHTgIAA2rlzJ9WtW1ej42zbto3nDq1fv54HPqtXr6bIyEi6evUq+fr6Ku2fn5/Pl9Zgj7EhNzbcdufOHT7pIli/70aE0tHrj6hRNU86dzfeZD0/v5xMouHhmi25kZZTQP9eSaGewf58skYAALA+dhIdEh5Y4MOetmXLFl7hxbCFTt944w2yt7fnAZAmWMDTpk0b2WzRxcXFPIB65513aObMmUr7syBp+fLldOXKFXJy0m2GYFaN5uXlRWlpaeTp6anTMcC4fjt9l2b8es6or9G9kS9FXXkou9+shhel5xaQn4crbR3fjuxVzATNlsuIufWEXmxejb4aGmLUNgIAgHG+v3Ua9jp06BAtW7ZMFvgwbH2vpUuX8sc0wXpx2FpgERERpY2xt+f3Y2JiBJ/z119/8TXF2LCXn58fNW3alM8vpG64LS8vj58w+QtYNlOsQCEf+DDx99LozpNsir2dSldTVJfBs8CH+ef8A6O3EQAAjEOn4Ifl2mRkKH9BZGZm8jmANPH48WMetLAgRh67n5ycLPicW7du8eEu9jyW5zN37lxasWIFzzlShQ3HsUhRemE9S2DZ7FVMfrj0lWYmeX0zzL0IAACWHvywGZ3Hjx9PJ06c4MNf7HL8+HGaMGEC9e/fn4yFDYuxfJ9vv/2WWrduTYMHD6bZs2fz4TBVZs2axbvIpJekpCSjtQ8Mw8lB+GPJZmY2BTu5ZGgAALA9OmVsfvnllzRy5Eg+BCXNvSkoKKABAwbwpGVN+Pj4kIODA6WkpChsZ/f9/f0Fn8MqvNjrsedJNW7cmPcUsWE0oV4n1kvFLmA9IoKVk92ZYhNVZWHhdwAA26ZTzw+rrvrzzz/5jM5sGIpd2G1W7q5p5RULVFjvTVRUlELPDrvPgiohHTp04JVmbD8p9rosKNJ0uA0sn4ujA8V+1J0mdFGsGgwJ9LboYa/cgiIqNlX3FAAAGL/np7zV2g8eLFkkklm5cqXGx2Q9SKGhoRQWFsZ7jbKysmj06NH88REjRvBydpa3w0ycOJFXhk2ZMoVXhF2/fp0nPL/77ruavg2wEr6erjSzdyNaf+imbFtwddNU5+nSwfQ4M49CPz5AbWtXpm1vCQfvAABgZcHP2bNnDb5SN8vZefToEc2bN48PXbVs2ZL27NkjS4JOTEzkFWBSLFl579699N5771Hz5s15YMQCoQ8//FDj1wTrFlC5AiWl5hj1NQoFem/2XUwm9zLz+rBcN+nnfXd8SfXXiYRUo7YNAABMGPzI9+wY0uTJk/lFSHR0tNI2NiTGkqtBHPo086dd8cnUqb6P0mO/TQinQeuFp0XQx54LyXQlOZ1eblWySO+DtBwa/+Nppf3CPo2i4e1q0bvd6xu8DQAAYDyYohYs2rJBLSiisR91b6w4JQLj426cRPYvoq7z6/Z1fcjP05XuPc1RuTL8yv3XSoIf1McDANj+2l4ApsCGml4JqUleFYRn9K5Vxc1or52RW8ivs/O1X7MOAAAsF4IfsGpbxrY12rEdnte8m6rEHgAATAPBD1iV2X2CFe7X9DZezw+LfW48zKC7Koa9FCBAAgCwGsj5AavSpUFVlY9V93Kl+2m5BnutS/fTaeKWMwY7HgAAWAb0/IBVkc8rLtvXwu6z1dYNZcq2OI3223LiDhWVKY8/cv0RDVz3H11Xs0gqAACYB4IfsCrlFVUtG9TcYK+VX1g6k7g6s/+4QD/HKq4ZN/z7WDp95ymN/b9T/D4Lgq4hEAIAsAgIfsCmuDk70piOtU3+ulflApv/bjyW3b7zJJsve9Fj1WHqueowvw0AAOaF4AesirODPTk5lHT/VK/kKphzPKdvYzKnhX9fUriflVdSMs9kyt0GAADzQMIzWBW2nET8gkieY8MWQJUneZ4FpM0SK8buBWLs5dqDhU8BAMwPPT9gdVydHKii3DpbbYJKVnt/tXWAbNuygYbL/dGXfCzGlsTYGptozuYAAIgeen7A6m0aHUanbqdSh3py63/JBRweLo6UYUHDTTO3x9PrYYHmbgYAgGih5wdsYgmMrg19yclB7uMsN7p0Zl4PMqfy5j9MzcrnK8QDAIBpIPgBm+cklyRtDurCmgOXUihk8X6as+OCCVsEACBuCH4AjEzd2mDL917l11tOaJ4H9Omuy7Rq/zWDtA0AQIyQ8wOiYMeTgMwztCQU+zzOzKNfT92l5HTtluNITsulbw/f4rcndaurVPEGAADlQ88P2CRp2bs5SFeDl7VFIPqZ+L/T9NmeK5SWUyDbxnpzyi6ToW7W6YfpeSidBwDQAYIfsEkVnMt0apZJ+Wnk7yH4vJWvtdD7tfu3qK5wXyg+OXn7qdK2L6Ku0y+nFJfJ+PH4Heqw9F9KeJylVDbfadlBGvd8+QwAANAcgh+wSb2b+lOPYD+a1buR4OMTu9YV3O4oXzGmoz/O3tO5F+r2k5IgR2rujgt071kOzfz9vOD+UVceolIMAEBLCH7AZiu8vhsRSm91UQ5yXmjkS32aCa/+Xq+qu8Hbos3IlKphrBMJqfxaaPLq6GuPdG4bAIAYIfgB0dk4qo3inEBygqt7Gvz1tMnLUbdrXNIzweTpY9dLF1IFAIDyIfgBUahV2a3cfVTlAelrpRZl6SzhWVXS80trj9GqA8rH2nA0gZJSs/VqIwCAmCD4AVFgQ2AsB+iPSe1V7qOqN8jQOUDqbP7vNp/08FFGnuDj288IH2vZ8/mCAACgfAh+QBSCfCryAKhVYMkiqOpK1C8ujKSIxn5kLqz8/ceY22RuGbkFVFhUWloPAGArEPwAPOf4PPhhK8ZvGBlq1rbkF2lXwWXoxTsepudSswX7qN9Xxwx8ZAAA80PwA/CcoxnX/ypr/aGb9Cw7X+P9harA9LH/cgq/vvwg3bAHBgCwAAh+AJ57sbni5IT1fd2NmghdngOXH2q8r+WEbQAAlg9rewE8NzQsUOH+T+Pa0YHLKXzGZjYUFjRzp0nbo01AY2forh8AABuGnh8QLV8PF369dmgIHZ/VnezLrMlV1cOFhoQF8sDH0rGW5xUW0fFbT6hATZJybkERbY1NpAdpOSZtHwCAJbH83+oARhL9fldeUl6rSkWN9u9U34eOmHBCQW06c1jPz8zf43lZ/aj2QbSgfxOVcw6xVeF93J3p1JwehmssAIAVQc8PiJabs6PGgQ/z7fBQmqRiTTBjYOt2aRMoSecTYnMFqXLw+TEfZ2qeTA0AYGsQ/ABoqIKzA83o2dBkr7fz/AON90XGDwCA5hD8AGiB5QWdmhNBlqaozKJfv52+a7a2AABYOgQ/AFrycS9JlLYkZZe9mPHrOTqb+JRe/voYbTuZaLZ2AQBYIiQ8AxiBm7MDZecXmbUNL3/9H78+m/iMBrcpKeNHRTwAAHp+AHTCqqXU9QIhxgAAsFwIfgB08OuE9ryk/JvhIRrNFm0p7FSEZXeeZFF2fqHgYzcfZdLpO6lGbhkAgOkg+AHQQW2finwuHX+vCoKPz+8fLJtE0VLsjn9AV1MylLZfup9OXZZHU+dl0YLP677iEA1cF0NJqdl8ksQ1UdfpSnLJml9ZeYW0+sA1upqsfFwAAEuF4AfASHMIbXsrnCxFalY+TdxyRmHb+bvP+PX+SyWLmD7OzOPX+YXFVKZ4TNYD9GXUdVqx/xr1Wn2Eb1ux7xqtPnCdIlcfNv6bAAAwECQ8Axixd8hSxCY8UdrW/6tjlLCkD8XcKp21Oi27gMKXRgkma7OAKP5emsK2uKSnRmoxAIDxoOcHwIhCAiuRJfhbxYSJu+KT6fit0nyePRcfqKxSk5Byd5BABxEAgMVDzw+AEW0aHcYXG72ekkGf77tmtnaomi2azQck79ajLJXHYD0/8qvHX7iXxsvoAQCsDXp+AIzIq4ITRTbxJ0cHy/xRyylQ7OX55vAt9cGP3P0X1xw1YssAAIzHMn8jA1gJiVBmsOB+ZPUkJjhPAACmgOAHwASKbeDLnwUwuswQPfuPeIpYeYhyzDzjNQCAFIIfAD3I58Doo6KzA1m6n2IT1c5cvfDvi7TxaILS9i0nEunmoyzaGa/5KvWaSsspoOV7r9A1gfmLAABUQfADYALywz41KlUQnDHa0kVffaQ22Nt07DYt+ucSnbydyic//OlEIvX9smQ+IGP5+J9LtPbgTeq5CvMMAYDmUO0FYALyo1473+1IF+6l81mSP955mW8Lru5Jw9vVoh+P3yFLpkk/16vrY3R+rrbO31WcdwgAQBPo+QEwkFpV3Ph1I38PpcecHEt/1Cq5OVPH+j7kLLeNmfNiY+rVxJ8WDWii/HwHy1gqVZ9RvozcAtpzIZnPIK2Lw9ce0T/n7+veAACA5xD8ABhoOGvjqDY0uVs92jS6jdJ+w9oGUuNqnjQ1or5sW0RjP35dt2rJTNAujg60fnhrGhEepPR8oW3mcODyQ52fu+DvSzThf6fpk52XdHr+iI2xNPmns3T/WY7ObQAAYBD8ABhINS9XmhHZkKoJLHbq4epEu6d0oqkRDWTbqleqQGfn9qDdUzor7b9hRCj5eZYujGoZ/T6G8UNMydBeQVExLfr7Ev17pWRtMW3WKROadZqtPcbyjAqLdOtZAgDxQM4PgBl5V3QW3B4R7McvQTN38vuFxdZfKi9vXfRNsrcj2ngsgV9uL+2r9zxBbPV5aVA1sr1l9JQBgGVCzw+AFbC1SQI/23OFluy+onafomIJjf+/U7Qm6rpCwrh83pGdQJ/Y6TtYbBUA1EPwA6AHlqcjZW+gOX+EtAysRL2b+pOYHLzykPZdSqEV+68pTBIpH/BgsVUA0AWGvQD0UNXDhd6LaMArt1ydDD9RIUuejk1Ipf4tatDLrWrKhsFszY6z9/gkimuHhvBzyvx2+q7scflRv/JiTFvrJQMAw0PwA6CnKXIVXIbWraEvv9i6qdvi+DWbrfmzgc0p/l4a7bmYLHtc1dIYhUXCPT9syMyBJRUBAAhA8AMAFoMtg9F43h7KLVCs2Fp36KbSvmnZBXTrcZbS9tTMfGr98X4+Z9LSgc2N2l4AsE7I+QEAi8GSlcsGPsxtuSBHOuz1l4oJD2NuPaFn2QW09WRSua/HJlx8+6cz9HNsosZtzCssogSBoAsArAeCHwArJJ3xmc0HtHFUKNk6+SGwp1kF/NpJh2GttQdv0JBvj1NuQckw2vYzd2nn+Qc0a3u8xsdgy3d0+zyajlx/pPXrA4BlsIjgZ+3atRQUFESurq7Utm1bio2N1eh5W7du5QstvvTSS0ZvI4Al2D6pPX38UlO6srg3xc7uzucCeqFRyUzRYjHku+P8WpecnuV7r/KeoT/j7slWhdd1PbFfT5UmZBvbngsPeNCWnJZrstcEsGVmD362bdtG06ZNo/nz59OZM2eoRYsWFBkZSQ8fqp9G//bt2zRjxgzq1KmTydoKYG4hgd70Rrta/Ivf18NVtn3b+HbUrWFVhVmhbZ2Tg+6/vvJ0XF9Mnnz5vbFN+N8ZHrTN/+uCyV4TwJaZPfhZuXIljRs3jkaPHk3BwcG0fv16cnNzo40bN6p8TlFREQ0bNowWLlxIderUMWl7ASxR2zpVaNPoMKpVuWSdMDHQpOeHldBLh7jKM3XrWbqSnK7x65ujoP5ptvY9VQBgYcFPfn4+nT59miIiIkobZG/P78fExKh83qJFi8jX15fGjBlT7mvk5eVRenq6wgUArJ8mK92zEvrP915V2v5nnHKy9I64+9T/q2OaNwDTCQFYLbMGP48fP+a9OH5+ijkL7H5ycmmCo7yjR4/S999/T999951Gr7FkyRLy8vKSXQICAgzSdgBL1Ov5LNBs+Ktv82oKj/UItq3cIAd7e62TpeWrythcQBKB6i9V4u+m0cf/XCp32CspNZteXf8f7RV4XQCwDGYf9tJGRkYGDR8+nAc+Pj4+Gj1n1qxZlJaWJrskJZVf/gpgrdiCnt+PDKVd73ai7o1KJ0e8srgXfTdCsSosekZXslZbYxP5+mCakK4RtmKfYg8QC06WlrO+mLx+Xx2lDUcTFIIfFiydSXyqsJL8R3/E08nbT+mtH0+TPqIup9CFeyXJ1QBgQ5McsgDGwcGBUlJSFLaz+/7+yusY3bx5kyc69+vXT7atuLjkl46joyNdvXqV6tatq/AcFxcXfgEQSx5M98YlPTx9mlWj/x2/Q2G1qwguvRHkU5H+nd6Ff6FP7FKXMnILqc+XR8gazNSiNP1BWi6/sHXC5E3ackbt834/fZfScwtodIfago+zwKfBnN389vjOdeijPo357dSsfKV9Wd7RhiO3eHVeI39Pta9792k2n6dozA+n+P3yVrwHACvr+XF2dqbWrVtTVFSUQjDD7oeHhyvt36hRI4qPj6e4uDjZpX///tStWzd+G0NaAKVYwLN9Ugea2buRbFtA5Qr8ur6vO7+uU9WdPn25GQVUdqPg6p705ZBWVKuKm9nabEmm/3qOFv59iX5RMVniwaul8/x8e/iW2mOt+fc6fb7vGvVafUTlxInMuaRn1PGzg/TimqNkDMXFEnp7yxnBPCgAMTH78haszH3kyJEUGhpKYWFhtHr1asrKyuLVX8yIESOoRo0aPHeHzQPUtGlThedXqlSJX5fdDgDKfhrbjr4/mkBjOgr3ZvRvUZ2qVHSmYRtOkFixZTOmbjsru//B7+fptTbl/2H197n7dPNRptq5gYQs2XWZvjl8i/55p6NgIrYhnbrzlHbGP+C3Z0Q2NOprAVgyswc/gwcPpkePHtG8efN4knPLli1pz549siToxMREXgEGAPpjPTwL+jdRu081r9L5g8Toi6jrCr06mnrn59KAqSx1UwKxwIf5fN9Vqlu1pEdOlYfpuXyCRjbfE/u/1JamZf8Ats7swQ8zefJkfhESHR2t9rmbN282UqsAxIkNhX01tBVdT8nkgYDYPM1WztnRl3JdmcA+GpTO336STVO2xpXcRi4QgM7QpQIASl5sXp3e7V5fMKHaxdG2f21k5hUa/JjP6zLUwrRBAKZj27/FAMCgMyi7OTlQJTen0vvOylVk1m5/maowQyiS69Y5eTuVngpUhEkkEtJ+tTI921Us0S1p+qcztEaEvYJgOxD8AIDGNowMpfVvtCZnB3tqXcubryoP5YtNSFVYFb7V4v205cQdepCWY7Y2seU8wj45oPXirsduPqad5x/Qiv3XjNY2AFHk/ACA9awhxlz9uBfZ2dlhEr5yZOQWkIdraU+ZvNl/XNAo54dVnxkDW86D+SvuHg0PD9L4eTn5hkuazs4vpJibT6hDPR/BuagAjAU9PwCg0opXW1CrwJLpJOSxwIcx4cLmZvVjzG2dntdswT6Nh4eO3nhMjzPzlLa3WLSPDEXov8uc/4VTt8bxyRwXyS0bAmAKCH4AQKWBrWvSH5M68CDop3FtlR5Xtb6VrZn750Wdn6vN8JC0N0YTY384JZscUR/m/C+Uzrr904lE8zUCRAnBDwBoFAS1r6u8np44Qh/LdOByCm0+VtojdftxFu2Of8ATp7VRdn92nx1H1YSNmjh45SHN2RFvknmFWHtHbYrlSdjGcOjaI35soSR1sF4IfgBAZ/I9P1U9sIaeqS3ZfYWePZ+XqOvn0TRxyxmFajUWfJQXgJQNlaKvPeLH6b7ikEb7Cxm9+ST973gi/fCfbsOF2khMzaboq494ErYxgq2RG2P5sZfsvmzwY4P5IPgBAJ3J9xqsGdLKrG0RK7aCvFBlGVtpvtWi/dRy0T7ZqvM5+cpzGJXtKGLri2mLLQD7c2wiJT7Jpj5flK5fxhaUVUXbHipVdKjW14m69wLWB9VeAKAz+e8vsS+LYS5fRl2nHsElywExhc+jAVbCnvO8J+RpdgHvmVsusKBp2dhBKCZhVVnsGDUqlSyMW9YHv56nPReTNW7zor8v0a7na4zpy1RzI0mT/E/fSSWvCs5U7/niwGCd0PMDADpzctDsV8i6YSF0YFpnwceGt6tFA1pWN3DLxCP+XhpN2Vq6rlj+814e++df1symYwk08/fzdPNRltLzF/9zSeVcP2weonXRN3nVWoel/9ItFXlAqgIfuSYo2HgsgZLTTdOTsudCMvVcdYiuJmfodRz2VpJSs2nguhiKWCk8JAjWA8EPAOiseU0vHrhM7lZPYfuQsEAaEV5Ldr93s2pUz9dD8Bjuro58nhfQnfxq8AWFxUr5WF9H36StJ5NUPv9jFaXmQ787QZ/tuSKbCbrs3ETGwIbDZm0/T989X/BVXxP+d5qupWTSZAMkRN/QIwkcLAuCHwDQayjgi9db0YzIhgrbF/QPpto+FdU+d1LXuhQWVJkmdKmLsjEDYsNeLPF3w9EEjZ9z6UG64PaEx4o9RTG3nmjVFjsdBqVYDtPPsUn0yS7tE4zVpRFl6blmG+vFMvXyI+UpeN7LB9pDzg8AGISPe2m1l6O9fbnzx3zQq1G58wX98GYYPcnMo2m/nDNcQ23cH2fv8fMp3xukzRpf5+5qn/CsLmBggRgrfe9Q34c8Vcx2rSpIYc/VZuZniZmi6JT0XFq6+wrv7WwV6K3186Mup1AN7wrUyN9T4+fM//MC/RSbSAemdaFaVdT/oSH16a7L/P9kVu/GJHbo+QEAg6jo4kjRM7rSkQ+68UVRhb6GxnSsza/fL9NTpOorq0uDqhr/YodS2gQ+zJXkDNp4NIEHG6xsXJ0NRzQfjnqWXUCN5u7hpfPNF+zjx9emt4IN12lDXcCtb1hkJ5f0XNaMX8/xoPPlr//T+rhsiRg2y3Wv1aVVcvJLiWw+lsBzjcr6IeYOFRRJeE6WJlKz8unbw7fom0O3tF7PzRah5wcADCZIbqjL2VH5b6s5fRvTG+1qUVAVN41nimYLqC55pRnN2h5v4NaCPLbExMMM5eU11JXWr9x/jVc/qfL7mbsK93+MuUMPMzRPdL6aLDwcp4qh+32kcyhJqRr2uiWQSK6paymqE7GX7b1Cm47d5rOExy+IFNxH0xkDpNMdMMWmmh/AgqHnBwCMYlBITQoJrETvRTSQbWN/ObNcoLJ/QZf3C5wlUIPxnUh4onWZ/bEbmj/nSVY+fXckQeOhK02+2FcfuGbwuYMYFqS1XLRfdl9Vr48xHb3+mF9n5OqXr8Qg3FGE4AcAjKKCswNtn9SBpkTU1+oX89iOtfmcND+OCdPr9THjtPbOJhou30cVVTHE19E3BNf+eufns7zXguUCsV6msr0W8uuhqfuC1zYukgYesnarabs+tGmXquAu/m4adVl+kJf1g2YQ/ACA2VWQS2qd82IwxX7UnTrVr6rXMU/OjqDzC3qq3WfXu530eg0wXMXUsj3KEzAyf5+7T7svJNOr62P4HDu/nEoy2yKtulSv6fV6doqzaLMlTD7ZqTwtwbj/O0V3nmTzsn7QDIIfADC7/i2qU9eGVWlW70YGHWJg1UVODqqP5e6CtEdTYv8TuqSbsBmmpeX4M7fH0wlVJfcaHJv1Ir22PoZP+ii/7ffTd+nu09LE4rIfwbL3pb0wC/66SPee5ZAhsAouVcHW1thEHuCwYUP5ijg2TKjthJF2ev54saVM3tsWp5BHZG0Q/ACA2bHk6M2jw+gtNuePCiw4YjoKTIjorGamaXPkaoCw6w/VTxLIqpuEem/K/h8O/vY4vy67cGphcTFdvJ8mmNArzSWKvZ3KL/KTPrKk4um/nqOuy6PV9vLIN0Pazs0GXLyVVXCpIv+WQhaX5iLpQqJnDxkrPmDVbf+cN8wSJeaA4AcArMLaoSG0YUQozesXrPTYZ4OaqXweKlssh/yK80L6f3WUPvpDuarv4r00wf3n/3VRaamOvl8eFVyBnXVSsOq0QwKl/EduPFZYF02YnVKFIkv4Nib5YEv+1fOez+Ktbt6mhX9f5KvRq3I1OYPidFjEVh4birNWCH4AwGrmEYoI9iMXgRL6l1rWoL1TOwsurqr+C00Ym3kazNMzlJKep1GPSL81R5W2SZOfpRVl8sNDjzPzeLDyTZllM7769zodvqYcEJXtMMzILaCziaVl/uxjxYIpfWn66ZRfq03h+QIH+Of8fd6b9baKJT3s7IgiVx+ml9Ye4/P/iBGCHwCwKmxl8YDKFZSGRRr6e5CjmvweIWwo5K0udZRWK1/3RojCNnuMnFnkgq7q/Bl3j5ou2Kt2H5bj8/k+zQKYEwmpCvt2XnaQTEmb0dtH5czX9DgzT2F2ajFC8AMAVsXRwZ4OTu9K/7zTUbYqvDry8wyVxfI62FT/Rz/sprC9irsLLX6pieDSHWAdpmyNKze3RZ/5cwy1Kr26uYmMla8WsfIwiR1KHQDAKgOgpjW86MriXgprPwl9j7zbvR71bV6NL8vAklxfaVWD8p/PGyPtQRL6kpEfZkDOtG2SX9PM0IEHyzU7k/iUGlfz5EO2msovLBacHV3VsFd5PvztPH02qLnZpgewVOj5AQCrVXbRy5HhQfy6U30fhS+xer7utHBAE/rfmLb06SvN6KuhIbRpdJjGX3DlffG880I9ndoP5qVuWRV9bYlNpEHrY2johhNaPW/498L76xqLbVMzL5K+luy6QtYKPT8AYDPYwqmhQd78r+2yXBwdqKNcUKSNz19tQcPUfIk18PPQ6bhgXkLJ8GwYim3Wt7Pvl+el9OeSnvFjsgotFqyz22xeoMoVnfnknnPLzO3Dcouk5Nugqj2ahm+q4jzJ8yOwdmnb25VTUETWCj0/AGAz7O3tqFWgt1KPkC4Ki0q/LUICvdW/bjlfGg39PBR6o8AyJD5RXi399W+P82Tm8srJtTFy00lqPG8PTzRedeA6dfzsIAXP28vXRcst0Ox1/hUo0TfkOmbhS/6l5XuttydHWwh+AAAEZOWXJsM6lFPupe5htsbYp680NWTTwECmbotT2sZ6XljPzIxfz+l1bPkFWlkpPet5YTNLy88NtDNe/SSB8jG1UDm+IUgkROujb/EE7rUHb/IqOWkSNsuLm7YtjqIuq5+fyRoh+AEAICI/T8WKrojGfjwhenBoAE9AZUNqr7cJoGk9lKvH2HBB3aoVVa4x1roW5g0SG6FhpluPsxTuJ6epXxZDlzwf+SU6NFVUXKxQJccWlGXWH7pJ28/eozE/nCJbg5wfAIDnwc6WE4my+X1Yhc7h97vJ8iDmvlgyszT7q7hnEz8K8HajJvP3ynp+No0Ko87LTTv3C1i3gyqGsv53/A6fqDBTg1L8skEWG1JTN71DWRuPJlBqtuJEh2/9eJrOzetJyWm2OwcQen4AAIhodt/G9MnLTWn7pPaybUIJoGxbI39PhQkV2bBYYBU3+uL1luTh6kiRTfyUnqdNHlKdMr1IU7rX1+KdgDn9aoDqqjk7LtDxW6l0WyAnydBzEG0/e4+iBYKwL4y8dIe5IfgBACAiN2dHGta2Fvl5Ki+RIUR+4UtpwvOAljX4X8wdBBZfnfdiMHlVcNIo8blsAvXErqoXfFWnRUAlnZ4Huvt452X65tBNuvVIcYjL2jzLEV72IlNuyRChITdrWe8LwQ8AgA7kk5zlYxVWcdaipnLQEVDZjc7O7UE/jmmr9riuTvZKK9c76ri+hqvAZHlgXGk5BbRk9xUzl4GrXtVeU0729oI5R60W7RPc//6zHD7k1mqRfivOmwp+MgAAdCA/JFa2p4b1uGwZ25YOvd9VYTsLjDTxQa+GCvfLqzYTsuLVFrwnCkAXqdn5dEiuwuyvcyWLxhbITQEh7+Tt1HJnzbYkSHgGANCBfDwiNM+P0NCXJppW9+JDcPK0nXyOrXw/sHVNvsQCyx9ic9eAbWKJ0WX9HKucd6TtZNb7n1d8Sb3781kKraU83xUb5vr+SILaNcosEYIfAAC9e350O8YrITWospszbTiawO8PDKlJ70cq9vroInZ2REm77O2oXZ0qeh8PLJcmkzH2/uIITdIxb0zeqTtPqaxP/rls1CU0jAXDXgAAetJ0OKusrg19aYZcsLNsUHPy99Is4VodllgNIHX5QTq98/NZvY/zrsAxziYpB0TWAD0/AAB60nXFbQ8XR14Cf2pOBE9q1ja3Z92wEMrOL6Lpes5GDKCrrDzrXN8LPT8AAHrStuNnYf8m9GrrmtSlQVV+38fdhSq5OWv03H4tqstuN6rmSdUqlfYUsdXrf3krnHQhNDcRQHnYUiBlpaTnWnziM3p+AAD0xMrYtTGyfZAGx6xASanKXyxhQd709/PKm2KJhNrVrkL9W1Sn+r7u9I6WkyFGz+hKXT+P5redHPC3MBhG20+jqI5PRdr3XmdytNDPFYIfAAAdsV/uGbmFGk+MqI2gKhUFgx8H+9IvE1Zgw/KNvhzSSuvjs2U8gnwqal1RFtHYlw5cfqj164G43HqcZbGBD4PgBwBARw38PIx2bHcX4V/Pit8nug8tlF1CQxOero60YWQbikt6Ri+tPabza4Pte6GRL1kyyw3LAABEjC2k2rSGJ33+aguF7fIrxOsztYq2cwfx15PLLVLn9TYBKh8LqqLdECFYp6ruLmTJ0PMDAGCBqleqQP+800l2ny2N8SyngGp6l6w6z2ga+zSv6UXn76YpbNOpOl+i2XIbLJF760nhuV8sOw0WzD39g6mg5wcAwAp4V3Sm2j4VFcrqWcKzJoSCFelxQgIryXprFvQL5rc/6tOImtXwUnqORM+1xkA8HCw8ukDPDwCAFZGPO/QZ9vL1KBmW2PZWOCWn5fKKNbYkx6uhAVTRxZHGdqxDdT7apfAc6RIG8vMRta1dmU4klKzrJITFWPLtbOzvSXeeZOvecLAKDjrOfWUqFh6bAQCAqlwdTWdylghUbH3Yq5GsxF2+VJ8FPiWvo1kbpvdUXo7Dw7W0XWUP4+RoT9XKmcWa9UaxUmmwXvYW3juInh8AACvz7fDWvMSe5QVp4rXQADqb+IwHHf9O70oVnB10SoiWaDi80aFeFcXjlOmiKm8m698mtOevVbdMzxNYj8zcQrJkCH4AAKxMzyb+Wu3P8nka+ntQI38PjQIfqV8nhFNmXiGN3nSS35ePYUaE16LbT7KpVYDiSt/xC3oqLfq67NUWWi3BYem9BqDZPD+WDMNeAAA2jgUjIYHe5Oas3d+7bYIqU7eGvrL8oHZ1SsvsFw1oSv/3ZphCoDKodU2FIS/+2mRHA1vXVNimT66SKa0dGmLuJlitYgv/T0bwAwAAav0+sT1NjahPK15rqXY/wWE4K+7E6d7YV+fKtk71fUjMii18bS8EPwAAoBZLiJ4a0YAqVxRefHXTqDb0WmhNmtCljtJjmoYOrIzf0rg6OVBhOV/iYzvWFtxu4R0foofgBwAA9NKtkS8tG9RC42G1ugIzRFd00TwXqTzyE0Eaw/QeDWS3Va1fZemrmhubJa/rxVh26wAAwKr5C5S1Lx/UnAaG1KQ/3+5Ap+dE0Jm5PfSeF8bbrTTXqHE1T8G10o580I0MoeXziSEZJwfhducVFpGYOWCeHwAAEJtt49tReJ0qtGFEKL//zgv1eAAyrUcD8vN0pRWvtaAWAZWoiruLyuE0Teye0onej2xIE7rUlW379OVmSvu92LyawnxG+pAf0mJDY0I61q/Kq+s8XMVZVG1n2bEPgh8AADC8tnWq0M/j21H95yvfs8kQz83vqTK3R9dBItbL83a3evRSqxr8fqvASlTVw4XcypT0a5KD06eZZlMIsEO91bkOXzPt1VDFSjapsZ1q88CM5UOVp4aG8zWB4YgzJAUAAJNTN7lhvaruSouvaoP1Jl1cGEkVnvfElA12JBqEV6wsXxNsmY9ZfRrz2wVFxUqPz+nbmDyfl/y3ruVNQ8IC6efYRJXHa1+3Ch269ogeZuRp9PqgP/T8AACA2c19MZgHCdsntS9339BaihMryi/NIZ13SD7Y8XF3pikRDWSzY7cMKM3ZYfo2r8avx3VWrlYTIh9GseVB1M1xw+ZYWvJKMz4EJlVPIOHbWOnR/xvT1khHtm4WEfysXbuWgoKCyNXVldq2bUuxsbEq9/3uu++oU6dO5O3tzS8RERFq9wcAAOtYtZ4FCWwyxvL4lbM2WNmen5OzI2RDS2x27B1vd+DzFjHvvlCPvhrSig/JsaCougbHLuuXt8Jp/RutZfcFOoP4ArJMnaoVaXwn5SBL1bDcKyElw3ma6NygqmCvElhg8LNt2zaaNm0azZ8/n86cOUMtWrSgyMhIevjwoeD+0dHRNGTIEDp48CDFxMRQQEAA9ezZk+7du2fytgMAgOl0b+RLP49rR/P7BVNkEz8+w7Qq8sGE0DplU7rXp8Pvd6P3ejTgj0sXiY1+vxuN7hBEv08sCVY0EVa7MvVq6q92dmN2/EuLImnf1M5Kj5fcE45+ujfyE9z+3fNEcnlOAsOKrCesVhXDJHrbErMHPytXrqRx48bR6NGjKTg4mNavX09ubm60ceNGwf23bNlCkyZNopYtW1KjRo1ow4YNVFxcTFFRUYL75+XlUXp6usIFAACsz/ej2lB43Srk6+FK3wwPFezp0DTHhwU8gVXclAIjZ0d7mt+vCbWuVbqUh8DBBUl7WQa0rC74OJsHic1/IzQF0MjwIMHnNPR3p6jpXej8gp4K23sEKwdFZd8Lq6ZjVg1WPzO3GJk1+MnPz6fTp0/zoStZg+zt+X3Wq6OJ7OxsKigooMqVhT+oS5YsIS8vL9mF9RQBAIBtM+YMy44q5vZh+TUXFkZSTW/1PS1CPUOTutXjC8kqs6O6Vd1lCdTqdKinOMQ1OLTk+86YVec/qOh9Q6m7Go8fP6aioiLy81OMYNn95ORkjY7x4YcfUvXq1RUCKHmzZs2itLQ02SUpKckgbQcAAOMKqKx7CbgxYh82l1C3hlWpfV3hdbvYEBOby6jctpUJfl5qWYNXwrGFZMuqJpCDtHGU8pAXM7xdLcFEbnOo5mXZ5ftWXeq+dOlS2rp1K88DYsnSQlxcXPgFAACsy653O9Hagzdp/aGbWj+3bIBhCDN7NzLIceSXvmDzEnWUWwSVzULNZoeu5OZMhUUSXsEmxVJ62FNbB1Yud0kJFghJ85g0ERJYic4kPhN8bNPoNhSbkEpNqnvS5J/OKjzmrGIZi9l9S6YCsFRmDX58fHzIwcGBUlJSFLaz+/7+6ieb+vzzz3nwc+DAAWrevLmRWwoAAKbm4erEq7L2X0qW5a9oylChz6rBLWjm7/G0fnhpNZe+5HN+fp+gWNqvbhZqVpGWV1hMXnJLeWg7NKfK0oHNee/Ti18epZwCxaU5alV2o24NffntssFP29rKgdiwtoHk427ZnQ5mHfZydnam1q1bKyQrS5OXw8NVZ9ovW7aMFi9eTHv27KHQUOHuPwAAsH5s+YgD07rQyte0S9oNez6E5O+pfem6vJdb1aRLi3rJvvwNQT7nRzovkabBoKZBhSYTNrao6VXaDruS3KKyjy8b1JzqlNkuT6j91rCmq9mrvViZO5u754cffqDLly/TxIkTKSsri1d/MSNGjOB5O1KfffYZzZ07l1eDsbmBWG4Qu2RmZprxXQAAgLEIlaqXZ83QVjSpa10VScSGm5na0pKxpeSbrOrlJsithyZ0isd3rkuvPU+aVmdh/yZltlh+9GP24Gfw4MF8CGvevHm8fD0uLo736EiToBMTE+nBgwey/detW8erxAYNGkTVqlWTXdgxAAAAGFYO/0GvRgZbzNSQNFlqQ19sfTMhf0/uqHHbNF3rrGxitSmCO5tIeJ48eTK/CGHJzPJu375tolYBAAAYXqUKuq9iL++vyR2o/1fHFLZ9NbQVRV1+SCPbl84bJN+p07SGp2CPltDwYHk9bp8NbFZynDL7IfgBAAAABS+H1KDjCU+og4qSeU01r1mJrywvvyDsi82r84sqLKAZEV6LP6drQ1+K/ag75RcVy6rKNA1cBrWuSYPbBArm/QjNY2RpEPwAAACYEFsMVdsEbkNaNKCp7LZvmR6f8sKWA9M6019x92ms3CKwZXOiLD/0sYCcHwAAANANWwzWw9WR5r4YbJLXq+frQdN6NlSYcdoRPT8AAABgKk2qe9G5eT21KplXS4e4hZXJy3svogFZOvT8AAAAWLHyAp96vqrn6TFEJZr8sNfvE8MtssKuLPT8AAAA2DA2OeLpORHk4uRglOPbK8ReFr6i6XMIfgAAAGxcFQ1nhpboMOwlXxJvjDXVjAHDXgAAAMDpG7pYR+iD4AcAAAAMRN+11EwFw14AAACg17DVbxPCKTUr3yqSnRkEPwAAAMC5OTtSZl4haSs0qDJZEwx7AQAAAPfDm2FUq4obfT8ylGwZen4AAACAa13Lmw69341sHXp+AAAAQFQQ/AAAAICoIPgBAAAAUUHwAwAAAKKC4AcAAABEBcEPAAAAiAqCHwAAABAVBD8AAAAgKgh+AAAAQFQQ/AAAAICoIPgBAAAAUUHwAwAAAKKC4AcAAABEBcEPAAAAiIojiYxEIuHX6enp5m4KAAAAaEj6vS39HteH6IKfjIwMfh0QEGDupgAAAIAO3+NeXl6kDzuJIUIoK1JcXEz3798nDw8PsrOzM3hUyoKqpKQk8vT0JLHCeSiB81AK56IEzkMJnIdSOBeanwcWrrDAp3r16mRvr1/Wjuh6ftgJq1mzplFfg/3HiflDLIXzUALnoRTORQmchxI4D6VwLjQ7D/r2+Egh4RkAAABEBcEPAAAAiAqCHwNycXGh+fPn82sxw3kogfNQCueiBM5DCZyHUjgX5jkPokt4BgAAAHFDzw8AAACICoIfAAAAEBUEPwAAACAqCH4AAABAVBD8GMjatWspKCiIXF1dqW3bthQbG0u2ZMGCBXxGbPlLo0aNZI/n5ubS22+/TVWqVCF3d3caOHAgpaSkKBwjMTGR+vbtS25ubuTr60vvv/8+FRYWkiU7fPgw9evXj88oyt7zjh07FB5n9QLz5s2jatWqUYUKFSgiIoKuX7+usE9qaioNGzaMT9xVqVIlGjNmDGVmZirsc/78eerUqRP//LBZTpctW0bWdi5GjRql9Bnp1auXzZ2LJUuWUJs2bfgs8exz/NJLL9HVq1cV9jHUz0N0dDSFhITwCph69erR5s2byZrOQ9euXZU+ExMmTLCp87Bu3Tpq3ry5bHK+8PBw2r17t6g+C5qeC4v6PLBqL9DP1q1bJc7OzpKNGzdKLl68KBk3bpykUqVKkpSUFImtmD9/vqRJkyaSBw8eyC6PHj2SPT5hwgRJQECAJCoqSnLq1ClJu3btJO3bt5c9XlhYKGnatKkkIiJCcvbsWcmuXbskPj4+klmzZkksGWvn7NmzJdu3b2dVkZI//vhD4fGlS5dKvLy8JDt27JCcO3dO0r9/f0nt2rUlOTk5sn169eoladGiheT48eOSI0eOSOrVqycZMmSI7PG0tDSJn5+fZNiwYZILFy5Ifv75Z0mFChUk33zzjcSazsXIkSP5e5X/jKSmpirsYwvnIjIyUrJp0ybevri4OEmfPn0kgYGBkszMTIP+PNy6dUvi5uYmmTZtmuTSpUuSNWvWSBwcHCR79uyRWMt56NKlC/99KP+ZYP/HtnQe/vrrL8nOnTsl165dk1y9elXy0UcfSZycnPh5EctnQdNzYUmfBwQ/BhAWFiZ5++23ZfeLiook1atXlyxZskRiS8EP+9IS8uzZM/4B//XXX2XbLl++zL8gY2Ji+H32Iba3t5ckJyfL9lm3bp3E09NTkpeXJ7EGZb/wi4uLJf7+/pLly5crnAsXFxf+pc2wH072vJMnT8r22b17t8TOzk5y7949fv/rr7+WeHt7K5yHDz/8UNKwYUOJpVIV/AwYMEDlc2z1XDx8+JC/r0OHDhn05+GDDz7gf3DIGzx4MA86rOE8SL/spkyZovI5tngeGPYZ3rBhg2g/C0LnwtI+Dxj20lN+fj6dPn2aD3fIrx/G7sfExJAtYcM5bMijTp06fOiCdU8y7P0XFBQonAM2JBYYGCg7B+y6WbNm5OfnJ9snMjKSL2Z38eJFskYJCQmUnJys8L7ZujNs2FP+fbPhndDQUNk+bH/2GTlx4oRsn86dO5Ozs7PCuWFDCE+fPiVrwrqjWVd1w4YNaeLEifTkyRPZY7Z6LtLS0vh15cqVDfrzwPaRP4Z0H0v9vVL2PEht2bKFfHx8qGnTpjRr1izKzs6WPWZr56GoqIi2bt1KWVlZfMhHrJ8FoXNhaZ8H0S1samiPHz/m/8ny/1kMu3/lyhWyFewLnY2rsi+1Bw8e0MKFC3lexoULF3gAwL6s2Bdb2XPAHmPYtdA5kj5mjaTtFnpf8u+bBQPyHB0d+ReE/D61a9dWOob0MW9vb7IGLL/nlVde4e/l5s2b9NFHH1Hv3r35LyUHBwebPBfFxcU0depU6tChA/9lzhjq50HVPuyLICcnh+eYWfJ5YIYOHUq1atXifzSxXK4PP/yQB7Lbt2+3qfMQHx/Pv+BZfg/L6/njjz8oODiY4uLiRPdZiFdxLizt84DgBzTCvsSkWEIbC4bYh/iXX36xqB88MJ/XX39ddpv99cY+J3Xr1uW9Qd27dydbxBJZ2R8AR48eJTFTdR7Gjx+v8JlghQHss8CCY/bZsBXsj0IW6LDer99++41GjhxJhw4dIjFqqOJcsADIkj4PGPbSE+u+Y3/Vls3eZ/f9/f3JVrG/ZBo0aEA3btzg75MN/z179kzlOWDXQudI+pg1krZb3f89u3748KHC46xygVU92fK5YdjwKPv5YJ8RWzwXkydPpn/++YcOHjxINWvWlG031M+Dqn1YFY0l/cGh6jwIYX80MfKfCVs4D6x3h1UdtW7dmlfBtWjRgr744gvRfRbUnQtL+zwg+DHAfzT7T46KilLoAmb35cc5bQ0rT2bROovc2ft3cnJSOAesK5PlBEnPAbtm3aHyX3779+/nH1hpl6i1YcMz7AdR/n2zrleWvyL/vtkvPjb2L/Xvv//yz4j0B5/tw8rIWW6A/Llhf0FZ2jCPNu7evctzfthnxJbOBcv3Zl/4rDuftb/sMJ2hfh7YPvLHkO5jKb9XyjsPQliPACP/mbD28yCEfabz8vJE81nQ5FxY3OdBq/RoUFnqzip8Nm/ezCtaxo8fz0vd5TPWrd306dMl0dHRkoSEBMmxY8d4KSIrQWQVHtJyTlbm+u+///JyzvDwcH4pW8LYs2dPXhbLyhKrVq1q8aXuGRkZvOSSXdiPy8qVK/ntO3fuyErd2f/1n3/+KTl//jyvdhIqdW/VqpXkxIkTkqNHj0rq16+vUN7NKkJYeffw4cN5SSj7PLFSTksq7y7vXLDHZsyYwStY2GfkwIEDkpCQEP5ec3NzbepcTJw4kU9vwH4e5Et2s7OzZfsY4udBWtL7/vvv8wqhtWvXWlR5c3nn4caNG5JFixbx988+E+xnpE6dOpLOnTvb1HmYOXMmr3Bj75H9DmD3WQXjvn37RPNZ0ORcWNrnAcGPgbC5BtgHnM33w0rf2TwmtoSVElarVo2/vxo1avD77MMsxb7sJ02axMsa2Qfz5Zdf5r8I5d2+fVvSu3dvPm8LC5xYQFVQUCCxZAcPHuRf9GUvrKxbWu4+d+5c/oXNAuDu3bvz+S3kPXnyhH/Bu7u785LN0aNH82BBHpsjqGPHjvwY7PyyoMqazgX7wmO/sNgvKlbaW6tWLT6fR9k/AGzhXAidA3Zhc94Y+ueBnfOWLVvynzv2RSH/GpZ+HhITE/kXW+XKlfn/JZvTiX1hyc/rYgvn4c033+Sfd9Y29vlnvwOkgY9YPguanAtL+zzYsX+06ysCAAAAsF7I+QEAAABRQfADAAAAooLgBwAAAEQFwQ8AAACICoIfAAAAEBUEPwAAACAqCH4AAABAVBD8AAAAgKgg+AEAowkKCqLVq1drvD9bAd7Ozk5pIUhbpe35AQDDcDTQcQDABnTt2pVatmxpsC/kkydPUsWKFTXev3379vTgwQPy8vIyyOsDAAhB8AMAWmEr4hQVFZGjY/m/PqpWrarVsZ2dncnf31+P1gEAlA/DXgDAjRo1ig4dOkRffPEFH3pil9u3b8uGonbv3k2tW7cmFxcXOnr0KN28eZMGDBhAfn5+5O7uTm3atKEDBw6oHdZhx9mwYQO9/PLL5ObmRvXr16e//vpL5bDX5s2bqVKlSrR3715q3Lgxf51evXrx3iGpwsJCevfdd/l+VapUoQ8//JBGjhxJL730ktr3y95Dp06dqEKFChQQEMCPkZWVpdD2xYsX05AhQ3jvVY0aNWjt2rUKx0hMTOTngLXL09OTXnvtNUpJSVHY5++//+bnxtXVlXx8fPh7l5ednU1vvvkmeXh4UGBgIH377bca/o8BgK4Q/AAAx4Ke8PBwGjduHA8u2IUFBVIzZ86kpUuX0uXLl6l58+aUmZlJffr0oaioKDp79iwPSvr168cDAnUWLlzIg4Tz58/z5w8bNoxSU1NV7s+Cg88//5x+/PFHOnz4MD/+jBkzZI9/9tlntGXLFtq0aRMdO3aM0tPTaceOHWrbwAI31t6BAwfydmzbto0HQ5MnT1bYb/ny5dSiRQv+/tj7nzJlCu3fv58/VlxczAMf1nYWNLLtt27dosGDB8uev3PnTh7ssPfJjsHOVVhYmMJrrFixgkJDQ/njkyZNookTJ9LVq1fVth8A9KT1OvAAYLO6dOkimTJlisK2gwcPStivih07dpT7/CZNmkjWrFkju1+rVi3JqlWrZPfZcebMmSO7n5mZybft3r1b4bWePn3K72/atInfv3Hjhuw5a9eulfj5+cnus9vLly+X3S8sLJQEBgZKBgwYoLKdY8aMkYwfP15h25EjRyT29vaSnJwcWdt79eqlsM/gwYMlvXv35rf37dsncXBwkCQmJsoev3jxIm9vbGwsvx8eHi4ZNmyYynaw13jjjTdk94uLiyW+vr6SdevWqXwOAOgPPT8AoBHWOyGP9fywHhg2HMWGnNjQD+sVKq/nh/UaSbHhJDZc9PDhQ5X7s+GxunXryu5Xq1ZNtn9aWhofZpLvTXFwcODDc+qcO3eOD6mxNksvkZGRvDcnISFBth/rCZPH7rP3yLBr1jMm3zsWHBzMz4V0n7i4OOrevbvG54MN+bGcJ3XnAwD0h4RnANBI2aotFviwoR42JFWvXj2eOzNo0CDKz89XexwnJyeF++wLnwUd2uxf0omkOxa4vfXWWzzPpyyWd2Mo7JyUR9vzAQD6Q88PAChUW7FKLk2w/BqWJM1yWpo1a8Z7LFiCtCmxkniWcM1K6qVY+8+cOaP2eSEhIXTp0iUetJW9sHMgdfz4cYXnsfusp4th10lJSfwixY7JkrVZD5C0V4fl+QCAZUHPDwAoVDidOHGCBzFsKKhy5coq92WVWtu3b+dJzqy3Yu7cuWbpsXjnnXdoyZIlPHBp1KgRrVmzhp4+fcrbpAqrCGvXrh1PcB47dizv1WKBC+vJ+uqrrxQCvGXLlvHKMfbYr7/+ypOYmYiICB70sYRtVtHGqs5YwnKXLl1kQ4Tz58/nw15s2O7111/n++zatYu/PgCYD3p+AEBhKIvlzLCeCzZHj7r8nZUrV5K3tzefmJAFQCxnhvWomBoLJFg5+ogRI3hOjjR/h5WWq8J6ZFiF1rVr13i5e6tWrWjevHlUvXp1hf2mT59Op06d4o9//PHH/D2zYzMsuPrzzz/5OejcuTMPhurUqcMrx+QnjWQBEyvnZ5NHvvDCCxQbG2vEswEAmrBjWc8a7QkAYAVY7xMbkmLl9GyeHn16waZOncovAGBbMOwFAFbtzp07tG/fPj7clJeXx4etWMXW0KFDzd00ALBQGPYCAKtmb2/Py9bZLModOnSg+Ph4PtO0NDEZAKAsDHsBAACAqKDnBwAAAEQFwQ8AAACICoIfAAAAEBUEPwAAACAqCH4AAABAVBD8AAAAgKgg+AEAAABRQfADAAAAJCb/D7hmRI1sak9PAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "eval_loss = 0.2871\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from torch import nn\n",
    "\n",
    "class LR(nn.Module):\n",
    "    def __init__(self, input_dim, output_dim):\n",
    "        super(LR, self).__init__()\n",
    "        self.linear = nn.Linear(input_dim, output_dim)\n",
    "        \n",
    "    def forward(self, input_feats, labels=None):\n",
    "        outputs = self.linear(input_feats)\n",
    "        \n",
    "        if labels is not None:\n",
    "            loss_fc = nn.CrossEntropyLoss()\n",
    "            loss = loss_fc(outputs, labels)\n",
    "            return (loss, outputs)\n",
    "        \n",
    "        return outputs\n",
    "\n",
    "model = LR(len(dataset.token2id), len(dataset.label2id))\n",
    "\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torch.optim import SGD, Adam\n",
    "\n",
    "# 使用PyTorch的DataLoader来进行数据循环，因此按照PyTorch的接口\n",
    "# 实现myDataset和DataCollator两个类\n",
    "# myDataset是对特征向量和标签的简单封装便于对齐接口，\n",
    "# DataCollator用于批量将数据转化为PyTorch支持的张量类型\n",
    "class myDataset(Dataset):\n",
    "    def __init__(self, X, Y):\n",
    "        self.X = X\n",
    "        self.Y = Y\n",
    "        \n",
    "    def __len__(self):\n",
    "        return len(self.X)\n",
    "\n",
    "    def __getitem__(self, idx):\n",
    "        return (self.X[idx], self.Y[idx])\n",
    "\n",
    "class DataCollator:\n",
    "    @classmethod\n",
    "    def collate_batch(cls, batch):\n",
    "        feats, labels = [], []\n",
    "        for x, y in batch:\n",
    "            feats.append(x)\n",
    "            labels.append(y)\n",
    "        # 直接将一个ndarray的列表转化为张量是非常慢的，\n",
    "        # 所以需要提前将列表转化为一整个ndarray\n",
    "        feats = torch.tensor(np.array(feats), dtype=torch.float)\n",
    "        labels = torch.tensor(np.array(labels), dtype=torch.long)\n",
    "        return {'input_feats': feats, 'labels': labels}\n",
    "\n",
    "# 设置训练超参数和优化器，模型初始化\n",
    "epochs = 50\n",
    "batch_size = 128\n",
    "learning_rate = 1e-3\n",
    "weight_decay = 0\n",
    "\n",
    "train_dataset = myDataset(train_F, train_Y)\n",
    "test_dataset = myDataset(test_F, test_Y)\n",
    "\n",
    "data_collator = DataCollator()\n",
    "train_dataloader = DataLoader(train_dataset, batch_size=batch_size,\\\n",
    "    shuffle=True, collate_fn=data_collator.collate_batch)\n",
    "test_dataloader = DataLoader(test_dataset, batch_size=batch_size,\\\n",
    "    shuffle=False, collate_fn=data_collator.collate_batch)\n",
    "optimizer = Adam(model.parameters(), lr=learning_rate,\\\n",
    "    weight_decay=weight_decay)\n",
    "model.zero_grad()\n",
    "model.train()\n",
    "\n",
    "from tqdm import tqdm, trange\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 模型训练\n",
    "with trange(epochs, desc='epoch', ncols=60) as pbar:\n",
    "    epoch_loss = []\n",
    "    for epoch in pbar:\n",
    "        model.train()\n",
    "        for step, batch in enumerate(train_dataloader):\n",
    "            loss = model(**batch)[0]\n",
    "            pbar.set_description(f'epoch-{epoch}, loss={loss.item():.4f}')\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            model.zero_grad()\n",
    "            epoch_loss.append(loss.item())\n",
    "\n",
    "    epoch_loss = np.array(epoch_loss)\n",
    "    # 打印损失曲线\n",
    "    plt.plot(range(len(epoch_loss)), epoch_loss)\n",
    "    plt.xlabel('training epoch')\n",
    "    plt.ylabel('loss')\n",
    "    plt.show()\n",
    "    \n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        loss_terms = []\n",
    "        for batch in test_dataloader:\n",
    "            loss = model(**batch)[0]\n",
    "            loss_terms.append(loss.item())\n",
    "        print(f'eval_loss = {np.mean(loss_terms):.4f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10808854",
   "metadata": {},
   "source": [
    "下面的代码使用训练好的模型对测试集进行预测，并报告分类结果。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "11a9bf62",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "test example-0, prediction = 0, label = 0\n",
      "test example-1, prediction = 0, label = 0\n",
      "test example-2, prediction = 1, label = 1\n",
      "test example-3, prediction = 1, label = 1\n",
      "test example-4, prediction = 1, label = 1\n"
     ]
    }
   ],
   "source": [
    "LR_preds = []\n",
    "model.eval()\n",
    "for batch in test_dataloader:\n",
    "    with torch.no_grad():\n",
    "        _, preds = model(**batch)\n",
    "        preds = np.argmax(preds, axis=1)\n",
    "        LR_preds.extend(preds)\n",
    "            \n",
    "for i, (p, y) in enumerate(zip(LR_preds, test_Y)):\n",
    "    if i >= 5:\n",
    "        break\n",
    "    print(f'test example-{i}, prediction = {p}, label = {y}')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c5feb65e",
   "metadata": {},
   "source": [
    "下面的代码展示多分类情况下宏平均和微平均的算法。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "a5ac32c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NB: micro-f1 = 0.8961520630505331, macro-f1 = 0.8948572078813896\n",
      "LR: micro-f1 = 0.9151599443671766, macro-f1 = 0.9143871512947528\n"
     ]
    }
   ],
   "source": [
    "test_Y = np.array(test_Y)\n",
    "NB_preds = np.array(NB_preds)\n",
    "LR_preds = np.array(LR_preds)\n",
    "\n",
    "def micro_f1(preds, labels):\n",
    "    TP = np.sum(preds == labels)\n",
    "    FN = FP = 0\n",
    "    for i in range(len(dataset.label2id)):\n",
    "        FN += np.sum((preds == i) & (labels != i))\n",
    "        FP += np.sum((preds != i) & (labels == i))\n",
    "    precision = TP / (TP + FP)\n",
    "    recall = TP / (TP + FN)\n",
    "    f1 = 2 * precision * recall / (precision + recall)\n",
    "    return f1\n",
    "\n",
    "def macro_f1(preds, labels):\n",
    "    f_scores = []\n",
    "    for i in range(len(dataset.label2id)):\n",
    "        TP = np.sum((preds == i) & (labels == i))\n",
    "        FN = np.sum((preds == i) & (labels != i))\n",
    "        FP = np.sum((preds != i) & (labels == i))\n",
    "        precision = TP / (TP + FP)\n",
    "        recall = TP / (TP + FN)\n",
    "        f1 = 2 * precision * recall / (precision + recall)\n",
    "        f_scores.append(f1)\n",
    "    return np.mean(f_scores)\n",
    "\n",
    "print(f'NB: micro-f1 = {micro_f1(NB_preds, test_Y)}, '+\\\n",
    "      f'macro-f1 = {macro_f1(NB_preds, test_Y)}')\n",
    "print(f'LR: micro-f1 = {micro_f1(LR_preds, test_Y)}, '+\\\n",
    "      f'macro-f1 = {macro_f1(LR_preds, test_Y)}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "937fdc5c-3b90-455a-91f4-1202fbf514dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Torch version: 2.7.0+cu118\n",
      "Numpy version: 2.2.5\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import numpy\n",
    "\n",
    "print(\"Torch version:\", torch.__version__)\n",
    "print(\"Numpy version:\", numpy.__version__)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
